import sys
print sys.version
from joblib import Parallel, delayed
import multiprocessing
nCores = multiprocessing.cpu_count() - 2 # Allow other apps to run
print 'nCores: %d' % (nCores)
from datetime import datetime, time
print 'now: %s' % str(datetime.now())
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display, Image
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects.lib import grid
from rpy2.robjects.lib import ggplot2
import rpy2.robjects.pandas2ri
import numpy as np
np.set_printoptions(precision=4, suppress=True)
import os
import pandas as pd
from scipy import ndimage
from six.moves.urllib.request import urlretrieve
from six.moves import cPickle as pickle
import tensorflow as tf
print 'tf.__version__:%s' % str(tf.__version__)
%run img_utils.py
%run img_glbSpec_SFDD_ImgSz_64.py
# print '\nglbDataFile: %s' % (glbDataFile)
print '\nglbRspClass: %s' % (glbRspClass)
print 'glbRspClassN: %d' % (glbRspClassN)
print 'glbRspClassDesc: '; print(glbRspClassDesc)
print '\nglbImg:'; print(glbImg)
print '\nglbTfwVarSeed: %d' % (glbTfwVarSeed)
print '\nglbPickleFile: %s' % (glbPickleFile)
%run img_utils.py
glbObsFitIdn, glbObsFitFtr, glbObsFitRsp, \
glbObsVldIdn, glbObsVldFtr, glbObsVldRsp, \
glbObsNewIdn, glbObsNewFtr, glbObsNewRsp, \
sbtNewCorDf, \
_ = myimportDbs(glbPickleFile['data'])
glbObsTrnIdn = glbObsFitIdn + glbObsVldIdn
glbObsTrnFtr = np.vstack((glbObsFitFtr, glbObsVldFtr))
glbObsTrnRsp = np.concatenate((glbObsFitRsp, glbObsVldRsp))
print('Fit pickled set:',
len(glbObsFitIdn), glbObsFitFtr.shape, glbObsFitRsp.shape)
print('Vld pickled set:',
len(glbObsVldIdn), glbObsVldFtr.shape, glbObsVldRsp.shape)
print('Trn pickled set:',
len(glbObsTrnIdn), glbObsTrnFtr.shape, glbObsTrnRsp.shape)
print('New pickled set:',
len(glbObsNewIdn), glbObsNewFtr.shape, glbObsNewRsp.shape)
First reload the data we generated in 1_notmist.ipynb.
# pickle_file = 'data/notMNIST.pickle'
# with open(pickle_file, 'rb') as f:
# save = pickle.load(f)
# glbXFit = save['glbXFit']
# glbYFit = save['glbYFit']
# glbXVld = save['glbXVld']
# glbYVld = save['glbYVld']
# glbXNew = save['glbXNew']
# glbYNew = save['glbYNew']
# del save # hint to help gc free up memory
# print('Training set', glbXFit.shape, glbYFit.shape)
# print('Validation set', glbXVld.shape, glbYVld.shape)
# print('Test set', glbXNew.shape, glbYNew.shape)
Reformat into a shape that's more adapted to the models we're going to train:
def lclreformatData(I, X, Y):
X = X.reshape((-1, glbImg['size'] * glbImg['size'])).astype(np.float32)
# Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
Y = (np.arange(glbRspClassN) == Y[:,None]).astype(np.float32)
return I, X, Y
glbITrn, glbXTrn, glbYTrn = lclreformatData(
glbObsTrnIdn, glbObsTrnFtr, glbObsTrnRsp)
glbIFit, glbXFit, glbYFit = lclreformatData(
glbObsFitIdn, glbObsFitFtr, glbObsFitRsp)
glbIVld, glbXVld, glbYVld = lclreformatData(
glbObsVldIdn, glbObsVldFtr, glbObsVldRsp)
glbINew, glbXNew, glbYNew = lclreformatData(
glbObsNewIdn, glbObsNewFtr, glbObsNewRsp)
print('Trn reshaped set:', len(glbITrn), glbXTrn.shape, glbYTrn.shape)
print('Fit reshaped set:', len(glbIFit), glbXFit.shape, glbYFit.shape)
print('Vld reshaped set:', len(glbIVld), glbXVld.shape, glbYVld.shape)
print('New reshaped set:', len(glbINew), glbXNew.shape, glbYNew.shape)
# Check how much incremental memory is used for Fit obs
# del glbObsFitIdn, glbObsFitFtr, glbObsFitRsp
# del glbIFit, glbXFit, glbYFit
# Check how much incremental memory is used for Trn obs
del glbObsTrnIdn, glbObsTrnFtr, glbObsTrnRsp
del glbITrn, glbXTrn, glbYTrn
# print glbObsFitFtr.shape
print glbObsTrnFtr.shape
Turn the logistic regression example with SGD into a 1-hidden layer neural network with rectified linear units (nn.relu()) and nRELUs hidden nodes. This model should improve your validation / test accuracy_score.
%run img_glbSpec_SFDD_ImgSz_64.py
%run img_utils.py
# Add parameter for lrnRateTfw = 0.5 for GradientDescentOptimizer
def fitMdlMLPRELUSGDTfw(lclXFit, lclYFit,
nObsFit = 100, nObsBtc = 16,
rotatePby = 0.1, rotateMaxAgl = 5,
nRELUs = 128,
nStepsTfw = 10, lrnRateTfw = 0.1,
visualize = False, newObs = False, verbose = False):
from scipy.ndimage import rotate
from sklearn import metrics as skl_metrics
prtStr = '\nLogistic Regression (TensorFlow): ' + \
"nObsFit:%5d; nObsBtc:%5d; " + \
"rotatePby: %.4f; rotateMaxAgl: %3d; " + \
"nRELUs: %5d; " + \
"nStepsTfw:%5d; lrnRateTfw:%.4f "
print(prtStr % ( \
nObsFit, nObsBtc, \
rotatePby, rotateMaxAgl, \
nRELUs, \
nStepsTfw, lrnRateTfw))
print(" visualize: %s; newObs: %s; verbose: %s" % ( \
visualize, newObs, verbose))
# Ensure each Fit obs used at least once during training
if (nStepsTfw * nObsBtc < nObsFit):
print " nStepsTfw * nObsBtc < nObsFit: %5d < %5d" % (\
nStepsTfw * nObsBtc, nObsFit)
nStepsTfw = int(nObsFit / nObsBtc + 1)
print " overriding nStepsTfw: %5d" % (nStepsTfw)
startTm = datetime.now()
mdlDf = pd.DataFrame({'id': 'MLP.SGD.tfw',
'nObsFit': [nObsFit],
'nObsBtc': [nObsBtc],
'rotatePby': [rotatePby],
'rotateMaxAgl': [rotateMaxAgl],
'nRELUs': [nRELUs],
'nStepsTfw': [nStepsTfw],
'lrnRateTfw': [lrnRateTfw]
})
graph = tf.Graph()
with graph.as_default():
# Input data.
# The training data, we use a placeholder that will be fed
# at run time with a training minibatch.
# The validation data into constants that
# are attached to the graph.
# The tests data is loaded by batch thru a placeholder
# tfwXFit = tf.constant(lclXFit[:nObsFit, :])
# tfwYFit = tf.constant(lclYFit[:nObsFit])
tfwXFit = tf.placeholder(tf.float32,
shape = (nObsBtc, lclXFit.shape[1]))
tfwYFit = tf.placeholder(tf.float32,
shape = (nObsBtc, lclYFit.shape[1]))
tfwXVld = tf.constant(glbXVld)
tfwYVld = tf.constant(glbYVld)
tfwXNew = tf.placeholder(tf.float32,
shape = (glbImg['size'], lclXFit.shape[1]))
tfwYNew = tf.placeholder(tf.float32,
shape = (glbImg['size'], lclYFit.shape[1]))
# Variables.
tf.set_random_seed(glbTfwVarSeed)
# These are the parameters that we are going to be training.
# The weight matrix will be initialized using random valued
# following a (truncated) normal distribution.
# The bias vector get initialized to zero.
tfwW1 = tf.Variable(
tf.truncated_normal([glbImg['size'] * glbImg['size'],
nRELUs]),
name = 'tfwW1')
tfwB1 = tf.Variable(tf.zeros([nRELUs]), name = 'tfwB1')
tfwW3 = tf.Variable(
tf.truncated_normal([nRELUs,
glbRspClassN]),
name = 'tfwW3')
tfwB3 = tf.Variable(tf.zeros([glbRspClassN]), name = 'tfwB3')
if (verbose):
print(' tfwW1:', tfwW1.initialized_value())
print(' tfwB1:', tfwB1.initialized_value())
print(' tfwW3:', tfwW3.initialized_value())
print(' tfwB3:', tfwB3.initialized_value())
# print 'lblIx:%2d:%s'% \
# (np.vectorize("%.4e".__mod__)(tfwW.value()[:5, lblIx]))
def model(X):
lyr1 = tf.matmul(X, tfwW1) + tfwB1
lyr2 = tf.nn.relu(lyr1)
YLgt = tf.matmul(lyr2, tfwW3) + tfwB3
return(YLgt)
# Training computation.
# We multiply the inputs with the weight matrix, and add bias.
# We compute the softmax and cross-entropy (it's one operation in
# TensorFlow, because it's very common, and it can be optimized).
# We take the average of this cross-entropy across all training
# examples: that's our loss.
logits = model(tfwXFit)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits, tfwYFit))
# Optimizer.
# We are going to find the minimum of this loss using
# gradient descent.
optimizer = (tf.train
.GradientDescentOptimizer(tf.to_float(lrnRateTfw))
.minimize(loss))
# Predictions for the training, validation, and test data.
# These are not part of training, but merely here so that we can
# report accuracy_score figures as we train.
tfwYFitPby = tf.nn.softmax(logits)
tfwYVldPby = tf.nn.softmax(model(tfwXVld))
tfwYNewPby = tf.nn.softmax(model(tfwXNew))
def accuracy_score(predictions, labels):
return (1.0 * np.sum(np.argmax(predictions, 1) ==
np.argmax(labels, 1))
/ predictions.shape[0])
tf.set_random_seed(glbTfwVarSeed)
# For image rotation in feed_dict. Separate seed to ensure
# deterministic performance by tf session irrespective of
# rotation randomization
np.random.seed(glbNPySeed)
with tf.Session(graph=graph) as session:
# This is a one-time operation which ensures the parameters get
# initialized as we described in the graph:
# random tfwW for the matrix, zeros for the tfwB.
tf.initialize_all_variables().run()
if verbose:
print(' Initialized')
# Create a dummy feed for test data & occlusion visualization
# btcNewDct = {tfwXNew: glbXNew[:glbImg['size'], :]}
for step in range(int(nStepsTfw)):
# Pick an offset within the training data, which has been
# randomized.
# Note: we could use better randomization across epochs.
offset = (step * nObsBtc) % (nObsFit - nObsBtc)
# Generate a minibatch (w/ or w/o rotation)
if (np.random.rand() > rotatePby):
btcXFit = lclXFit[offset:(offset + nObsBtc), :]
else:
rawXFit = np.reshape(lclXFit[offset:(offset + nObsBtc), :],
(nObsBtc, glbImg['size'], glbImg['size']))
rttXFit = np.zeros_like(rawXFit)
angle = (np.random.rand() - 0.5) * 2 * rotateMaxAgl
if (verbose):
prtStr = ' step %5d(%5d secs): Minibatch rotation:' + \
"angle: %.4f"
print(prtStr % \
(step, thsDrn, \
angle))
# print ' rawXFit.shape: %s' % (str(rawXFit.shape))
for ix in xrange(rawXFit.shape[0]):
# if (ix % 10 == 0):
# print ' rawXFit[ix, :, :].shape: %s' % \
# (str(rawXFit[ix, :, :].shape))
rttXFit[ix, :, :] = rotate(rawXFit[ix, :, :], angle,
mode = 'nearest', reshape = False)
btcXFit = np.reshape(rttXFit,
(nObsBtc, glbImg['size'] * glbImg['size']))
btcYFit = lclYFit[offset:(offset + nObsBtc), :]
# Prepare a dictionary telling the session where to feed the
# minibatch. The key of the dictionary is the placeholder node
# of the graph to be fed, and the value is the numpy array to
# feed to it.
feed_dict = {tfwXFit: btcXFit, tfwYFit: btcYFit,
tfwXNew: glbXNew[:glbImg['size'], :]}
# Run the computations.
# We tell .run() that we want to run the optimizer,
# and get the loss value and the training predictions returned
# as numpy arrays.
_, l, predictions = \
session.run([optimizer, loss, tfwYFitPby],
feed_dict = feed_dict)
if mydspVerboseTrigger(step):
thsDrn = (datetime.now() - startTm).seconds
if (thsDrn > 100):
prtStr = ' step %5d(%5d secs): Minibatch ' + \
"accuracy: %.4f; logloss: %.4f"
print(prtStr % \
(step, thsDrn,
accuracy_score(predictions, btcYFit), l))
# Calling .eval() on tfwObsVldPred is basically like calling run(),
# but just to get that one numpy array.
# Note that it recomputes all its graph dependencies.
lclYVldPby = tfwYVldPby.eval()
lclYVldPdn = np.argmax(lclYVldPby, 1)
mdlDf['accVld'] = accVld = accuracy_score(lclYVldPby, glbYVld)
cnfYVld = skl_metrics.confusion_matrix(glbObsVldRsp, lclYVldPdn)
accYVldCls = cnfYVld.diagonal() * 1.0 / cnfYVld.sum(axis = 1)
mdlDf['accVldCls'] = None
mdlDf.set_value(0, 'accVldCls', {'accCls' : accYVldCls})
mdlDf['logLossVld'] = logLossVld = skl_metrics.log_loss(
glbYVld, lclYVldPby)
logLossVldCls = mygetMetricLogLoss(glbYVld, lclYVldPby,
returnTyp = 'class')
mdlDf['logLossVldCls'] = None
mdlDf.set_value(0, 'logLossVldCls',
{'logLossCls' : logLossVldCls})
if verbose:
print '\n Vld accuracy:%0.4f' % (accVld)
print accYVldCls
print cnfYVld
yLbl = [glbRspClassDesc[glbRspClass[ix]] + ':' + \
glbRspClass[ix] + ':actl' \
for ix in xrange(glbRspClassN)]
xLbl = ['pred:' + glbRspClass[ix] + ':' + \
glbRspClassDesc[glbRspClass[ix]] \
for ix in xrange(glbRspClassN)]
# print labels
plt.matshow(cnfYVld, cmap='Reds', interpolation='none')
plt.yticks(np.arange(10), yLbl)
plt.xticks(np.arange(10), xLbl, rotation=90);
plt.show()
print '\n Vld logLoss:%0.4f' % (logLossVld)
print logLossVldCls
if visualize:
mydisplayImagePredictions(session, tfwW1.eval(),
glbIVld, glbObsVldFtr, glbObsVldRsp, lclYVldPby,
glbRspClass, glbRspClassDesc, imgVisualFn = plot_occlusion,
tfwXOcc = tfwXNew, tfwYOccPby = tfwYNewPby)
if newObs:
print " predicting %5d new obs..." % (glbYNew.shape[0])
lclYNewPby = np.zeros((glbYNew.shape[0],
tfwYFitPby.get_shape().as_list()[1]))
lclYNewPby[:, :] = -1.0
btcSz = tfwXNew.get_shape().as_list()[0]
for obsIx in xrange(0, glbYNew.shape[0], btcSz):
if mydspVerboseTrigger(obsIx) and \
(datetime.now() - startTm).seconds > 60:
print " @%5d secs: obsIx: %5d" % \
((datetime.now() - startTm).seconds, obsIx)
obsEnd = obsIx + btcSz
if obsEnd > lclYNewPby.shape[0]:
obsEnd = lclYNewPby.shape[0]
btcYNewPby = session.run(tfwYNewPby,
feed_dict = {tfwXNew: glbXNew[obsIx:obsEnd, :] \
if obsEnd != lclYNewPby.shape[0] \
else np.vstack((glbXNew[obsIx:obsEnd, :],
glbXNew[0:((obsIx + btcSz) % obsEnd), :]))
})
lclYNewPby[obsIx:obsEnd, :] = btcYNewPby[:, :] \
if obsEnd != lclYNewPby.shape[0] \
else btcYNewPby[:(obsEnd - obsIx), :]
assert (lclYNewPby[:, :] != -1.0).all(), \
'some cells in lclYNewPby == -1.0'
# lclYNewPdn = tfwYNew.eval()
# lclYNewPby = tfwYNewPby.eval()
lclYNewPdn = np.argmax(lclYNewPby, 1)
#if (tfwYNew.eval() > -1).any():
if (len(np.unique(glbYNew, return_counts = True)[0]) > 1):
mdlDf['accNew'] = accNew = accuracy_score(lclYNewPby,
glbYNew)
mdlDf['logLossNew'] = logLossNew = skl_metrics.log_loss(
glbYNew, lclYNewPby)
if verbose:
print '\n New accuracy:%0.4f' % (accNew)
print ' New logLoss:%0.4f' % (logLossNew)
print skl_metrics.confusion_matrix(glbObsNewRsp,
lclYNewPdn)
yLbl = [glbRspClassDesc[glbRspClass[ix]] + ':' +
glbRspClass[ix] + ':actl' \
for ix in xrange(glbRspClassN)]
xLbl = ['pred:' + glbRspClass[ix] + ':' + \
glbRspClassDesc[glbRspClass[ix]] \
for ix in xrange(glbRspClassN)]
# print labels
plt.matshow(skl_metrics.confusion_matrix(glbObsNewRsp,
lclYNewPdn),
cmap='Reds', interpolation='none')
plt.yticks(np.arange(10), yLbl)
plt.xticks(np.arange(10), xLbl, rotation=90);
plt.show()
if visualize:
mydisplayImagePredictions(session, tfwW1.eval(),
glbINew, glbObsNewFtr, glbObsNewRsp, lclYNewPby,
glbRspClass, glbRspClassDesc, imgVisualFn = plot_occlusion,
tfwXOcc = tfwXNew, tfwYOccPby = tfwYNewPby)
mdlDf['predNew'] = None
mdlDf.set_value(0, 'predNew', {'kntCls' : np.unique(lclYNewPdn,
return_counts = True)})
if verbose:
print '\n New prediction knts:'
print mdlDf['predNew'][0]
# indentation (6 spaces) determines scope of this
# before session.__exit__ & graph.__exit__
mdlDf['model'] = session
mdlDf['duration'] = (datetime.now() - startTm).seconds
print(' duration: %.2d seconds' % (mdlDf['duration'][0]))
if not newObs: lclYNewPby = None
return(mdlDf, lclYVldPby, lclYNewPby)
tmpMdlDf = pd.DataFrame()
# thsMdlDf, thsYVldPby, thsYNewPby = fitMdlMLPRELUSGDTfw(
# glbXFit, glbYFit,
# nObsFit = 100, nObsBtc = 16,
# rotatePby = 0.2, rotateMaxAgl = 10,
# nRELUs = 128,
# nStepsTfw = 10, lrnRateTfw = 0.5,
# visualize = True, newObs = True, verbose = True)
# tmpMdlDf = tmpMdlDf.append(thsMdlDf)
# To check if model results are deterministic &
# all run options work separately
thsMdlDf, thsYVldPby, thsYNewPby = fitMdlMLPRELUSGDTfw(
glbXFit, glbYFit,
nObsFit = 100, nObsBtc = 16,
rotatePby = 0.2, rotateMaxAgl = 10,
nRELUs = 128,
nStepsTfw = 10, lrnRateTfw = 0.5,
visualize = True, newObs = False, verbose = False)
tmpMdlDf = tmpMdlDf.append(thsMdlDf)
thsMdlDf, thsYVldPby, thsYNewPby = fitMdlMLPRELUSGDTfw(
glbXFit, glbYFit,
nObsFit = 100, nObsBtc = 16,
rotatePby = 0.2, rotateMaxAgl = 10,
nRELUs = 128,
nStepsTfw = 10, lrnRateTfw = 0.5,
visualize = False, newObs = True, verbose = False)
tmpMdlDf = tmpMdlDf.append(thsMdlDf)
thsMdlDf, thsYVldPby, thsYNewPby = fitMdlMLPRELUSGDTfw(
glbXFit, glbYFit,
nObsFit = 100, nObsBtc = 16,
rotatePby = 0.2, rotateMaxAgl = 10,
nRELUs = 128,
nStepsTfw = 10, lrnRateTfw = 0.5,
visualize = False, newObs = False, verbose = True)
tmpMdlDf = tmpMdlDf.append(thsMdlDf)
thsMdlDf, thsYVldPby, thsYNewPby = fitMdlMLPRELUSGDTfw(
glbXFit, glbYFit,
nObsFit = 100, nObsBtc = 16,
rotatePby = 0.2, rotateMaxAgl = 10,
nRELUs = 128,
nStepsTfw = 10, lrnRateTfw = 0.5,
visualize = False, newObs = False, verbose = False)
tmpMdlDf = tmpMdlDf.append(thsMdlDf)
print '\ntmpMdlDf: '
print(tmpMdlDf)
# glbMdlDf = None
# glbMdlDf = pd.DataFrame()
try:
with open(glbPickleFile['models'], 'rb') as f:
glbMdlDf = pickle.load(f)
assert isinstance(glbMdlDf, pd.DataFrame), \
'type(glbMdlDf): %s, expecting pd.DataFrame' % \
(str(type(glbMdlDf)))
except IOError, e:
print e
print 'file %s not present or not appropriate' % \
(glbPickleFile['models'])
print glbMdlDf
%run img_utils.py
srchParamsDct = {
'nObsFit' : [1000, 5000, 10000, glbObsFitFtr.shape[0]],
# 'nObsFit' : [1000, 5000, 6000, 7000, 8000, 9000, 10000,
# glbObsFitFtr.shape[0], glbObsTrnFtr.shape[0]]
'nObsBtc' : [8],
# 'nObsBtc' : [4, 8, 16, 32],
'rotatePby' : [0.1],
# 'rotatePby' : [0.0, 0.1, 0.2, 0.5],
'rotateMaxAgl' : [2],
# 'rotateMaxAgl' : [0, 1, 10],
'nRELUs' : [512],
# 'nRELUs' : [16, 32, 64, 128, 256, 512, 1024],
'nStepsTfw' : [2260],
# 'nStepsTfw' : [100, 1000,
# 1130 (min. for nObsBtc = 16),
# 2260 (min. for nObsBtc = 8),
# 4520 (min. for nObsBtc = 4),
# 10000],
'lrnRateTfw' : [0.01]
# 'lrnRateTfw' : [0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 7.0, 10.0]
}
jnk = mysearchParams(fitMdlMLPRELUSGDTfw, srchParamsDct = srchParamsDct,
curResultsDf = glbMdlDf,
mode = 'displayonly',
sort_values = ['nObsFit', 'accVld', 'logLossVld', 'duration'],
sort_ascending = [False , True , False, False],
save_drop_cols = 'model',
save_filepathname = glbPickleFile['models'],
lclXFit = glbXFit, lclYFit = glbYFit)
# thsDf, thsObsVldRspPredProba, thsObsNewRspPredProba = fitMdlLgtRgrTfw(
# glbXFit, glbYFit,
# nObsFit = 100, nStepsTfw = 10, lrnRateTfw = 0.5,
# visualize = False, newObs = False, verbose = False)
%run img_utils.py
glbMdlDf = mysearchParams(fitMdlMLPRELUSGDTfw, srchParamsDct = srchParamsDct,
curResultsDf = glbMdlDf,
mode = 'run',
sort_values = ['nObsFit', 'accVld', 'logLossVld', 'duration'],
sort_ascending = [False , False , True, True],
save_filepathname = glbPickleFile['models'],
save_drop_cols = 'model',
lclXFit = glbXFit, lclYFit = glbYFit)
glbMdlDf['bestFit'] = False
glbMdlDf.ix[(
'LgtRgr.SGD.tfw', 1000.0, 18077.0, 0, 0.1, 8.0, 3.0, 2),
# LgtRgr.SGD.tfw 1000.0 18077.0 0, 0.1 8.0 3.0 2
# id nStepsTfw nObsFit nRELUs, rotatePby nObsBtc lrnRateTfw rotateMaxAgl
'bestFit'] = True
print glbMdlDf[glbMdlDf['bestFit']]
print glbMdlDf[['accVld', 'bestFit', 'id']].head().to_string(index = False)
# print glbMdlDf[glbMdlDf.nObsFit >= 10000][
# list(set(glbMdlDf.columns) - set(srchParamsDct.keys()))]
mask = glbMdlDf['bestFit'].values | \
glbMdlDf['id'].str.contains('MLP.SGD.tfw', na = False).values
print glbMdlDf[mask]['accVld']
# Set value based on condition
# print glbMdlDf.ix[glbMdlDf['id'].str.contains('LogisticRegression.SGD.tf',
# na=False), 'id']
# glbMdlDf.ix[glbMdlDf['id'].str.contains('LogisticRegression.SGD.tf',
# na=False), 'id'] = 'LgtRgr.SGD.tf'
# print glbMdlDf.ix[glbMdlDf['id'].str.contains('LogisticRegression.SGD.tf',
# na=False), 'id']
def lclfixNanDf(df, column, default):
print "\n Before:"
dspDf = df[[column]]
dspDf[column + '.isnull'] = df[column].isnull()
print dspDf.to_string(index = False)
df.loc[df[column].isnull(), column] = default
print "\n After:"
dspDf = df[[column]]
dspDf[column + '.isnull'] = df[column].isnull()
print dspDf.to_string(index = False)
return(df)
# tmpMdlDf = lclfixNanDf(glbMdlDf, column = 'nRELUs', default = 0)
# print glbMdlDf.ix[- glbMdlDf['nStepsTfw'].isnull(), ['id', 'nStepsTfw']]
# glbMdlDf.ix[- glbMdlDf['nStepsTfw'].isnull(), 'id'] = 'LogisticRegression.tf'
# print glbMdlDf.ix[- glbMdlDf['nStepsTfw'].isnull(), ['id', 'nStepsTfw']]
# print glbMdlDf.ix[glbMdlDf['nObsBtc'].isnull(), ['nObsBtc', 'nObsFit']]
# # glbMdlDf.ix[glbMdlDf['nObsBtc'].isnull(), 'nObsBtc'] = \
# # glbMdlDf.ix[glbMdlDf['nObsBtc'].isnull(), 'nObsFit']
# glbMdlDf['nObsBtc'] = glbMdlDf.apply(
# lambda (row): row['nObsFit'] if pd.isnull(row['nObsBtc']) else row['nObsBtc'],
# axis = 1)
# print 'After:'
# print glbMdlDf.ix[glbMdlDf['nObsBtc'].isnull(), ['nObsBtc', 'nObsFit']]
# Change value
# tmpMdlDf = glbMdlDf
# print tmpMdlDf[(tmpMdlDf['id'].str.contains('LgtRgr.skl', na = False)) &
# (tmpMdlDf['nStepsTfw'] == 1.0)]
# print tmpMdlDf.ix[(tmpMdlDf['id'].str.contains('LgtRgr.skl', na = False)) &
# (tmpMdlDf['nStepsTfw'] == 1.0), 'nStepsTfw']
# tmpMdlDf.ix[(tmpMdlDf['id'].str.contains('LgtRgr.skl', na = False)) &
# (tmpMdlDf['nStepsTfw'] == 1.0), 'nStepsTfw'] = -1.0
# print 'After:'
# print tmpMdlDf
# Remove specific models
# mask = (glbMdlDf['id'].str.contains('MLP.SGD.tfw', na = False))
# mask = (glbMdlDf['id'].str.contains('LgtRgr.tfw', na = False)) & \
# (glbMdlDf['nObsFit'] == 10000.0)
# print mask
# tmpMdlDf = glbMdlDf[~mask]
# print tmpMdlDf.to_string(index = False)
# Remove dups
print glbMdlDf.columns
# print (glbMdlDf['logLossVld'])
print (glbMdlDf.index.duplicated())
tmpMdlDf = glbMdlDf[~glbMdlDf.index.duplicated()]
print (tmpMdlDf.index.duplicated())
# glbMdlDf['nObsBtc'] = glbMdlDf['nObsFit']
# glbMdlDf = tmpMdlDf
# print 'After:'
# print glbMdlDf.to_string(index = False)
# print glbMdlDf['bestFit']
glbMdlDf = glbMdlDf.set_index(['id'] + srchParamsDct.keys(), drop = False)
glbMdlDf = glbMdlDf.sort_values(['nObsFit', 'accVld', 'logLossVld', 'duration'],
ascending = [False , False , True, True])
print glbMdlDf.head()
myexportDf(glbMdlDf,
save_filepathname = glbPickleFile['models'],
save_drop_cols = 'model'
)
robjects.pandas2ri.activate()
pltRDf = robjects.conversion.py2ri(glbMdlDf)
# print(pltRDf)
# pltRFn = robjects.r("""
# source('~/Dropbox/datascience/R/myplot.R')
# function(RDf, filename) {
# mypltModelStats(RDf, c('accVld', 'logLossVld', 'duration'),
# dim = c('nObsFit', 'id',
# 'nRELUs',
# 'nObsBtc', 'rotatePby', 'rotateMaxAgl',
# 'nStepsTfw', 'lrnRateTfw'),
# scaleXFn = NULL,
# #highLightIx = which.min(RDf$logLossVld),
# highLightIx = which(RDf$bestFit == 'TRUE'),
# title = NULL,
# fileName = filename)
# }
# """)
# pltRFn(pltRDf, 'img_05_fit_MLP_RELU_SGD_Tfw_SFDD_glbMdlDf.png')
# pltRFn = robjects.r("""
# source('~/Dropbox/datascience/R/myplot.R')
# function(RDf, filename) {
# mypltModelStats(RDf, c('accVld', 'logLossVld'),
# dim = c('nObsFit', 'id',
# 'nRELUs',
# 'nObsBtc', 'rotatePby', 'rotateMaxAgl',
# 'nStepsTfw', 'lrnRateTfw'),
# scaleXFn = NULL,
# #highLightIx = which.min(RDf$logLossVld),
# highLightIx = which(RDf$bestFit == 'TRUE'),
# title = NULL,
# fileName = filename)
# }
# """)
# pltRFn(pltRDf, 'img_05_fit_MLP_RELU_SGD_Tfw_SFDD_glbMdlDf_logLossVld.png')
# pltRFn = robjects.r("""
# source('~/Dropbox/datascience/R/myplot.R')
# function(RDf, filename) {
# mypltModelStats(RDf, c('accVld'),
# dim = c('nObsFit', 'id',
# 'nRELUs',
# 'nObsBtc', 'rotatePby', 'rotateMaxAgl',
# 'nStepsTfw', 'lrnRateTfw'),
# scaleXFn = NULL,
# #highLightIx = which.min(RDf$logLossVld),
# highLightIx = which(RDf$bestFit == 'TRUE'),
# title = NULL,
# fileName = filename)
# }
# """)
# pltRFn(pltRDf, 'img_05_fit_MLP_RELU_SGD_Tfw_SFDD_glbMdlDf_accVld.png')
pltRFn = robjects.r("""
source('~/Dropbox/datascience/R/myplot.R')
function(RDf, filename) {
pltRDf = rbind(NULL
, subset(RDf, id %in% c("LgtRgr.skl") &
TRUE)
, subset(RDf, id %in% c("LgtRgr.tfw") &
nStepsTfw %in% c(1000) &
lrnRateTfw %in% c(10.0) &
TRUE)
, subset(RDf, id %in% c("LgtRgr.SGD.tfw") &
nStepsTfw %in% c(1000, 2804) &
rotateMaxAgl %in% c(2) &
lrnRateTfw %in% c(3) &
nObsBtc %in% c(8) &
rotatePby %in% c(0.1) &
TRUE)
, subset(RDf, id %in% c("MLP.SGD.tfw") &
rotateMaxAgl %in% c(2) &
lrnRateTfw %in% c(0.01) &
nObsBtc %in% c(8) &
rotatePby %in% c(0.1) &
nRELUs %in% c(512) &
nStepsTfw %in% c(2260) &
TRUE)
)
mypltModelStats(pltRDf,
measure = c('accVld'),
dim = c('nObsFit', 'id',
'nRELUs',
'nStepsTfw',
# 'nObsBtc',
# 'lrnRateTfw',
# 'rotatePby',
# 'rotateMaxAgl',
NULL),
scaleXFn = NULL,
highLightIx = which(pltRDf$bestFit == 'TRUE'),
title = NULL,
fileName = filename)
}
""")
pltRFn(pltRDf, 'img_05_fit_MLP_RELU_SGD_Tfw_SFDD_glbMdlDf_accVldSel.png')
# id nStepsTfw nObsFit rotatePby nObsBtc lrnRateTfw rotateMaxAgl
glbMdlDf.to_csv('img_05_fit_MLP_RELU_SGD_Tfw_SFDD_glbMdlDf.csv')
# selMdlSrs = glbMdlDf[glbMdlDf['bestFit']]
mask = (glbMdlDf['id'].str.contains('MLP.SGD.tfw', na = False)) & \
(glbMdlDf['accVld'] > 0.334)
selMdlSrs = glbMdlDf[mask]
print selMdlSrs
selMdlDf, selYVldPby, selYNewPby = fitMdlMLPRELUSGDTfw(
glbXFit, glbYFit,
nObsFit = glbXFit.shape[0],
nObsBtc = selMdlSrs['nObsBtc'][0],
rotatePby = selMdlSrs['rotatePby'][0],
rotateMaxAgl = selMdlSrs['rotateMaxAgl'][0],
nRELUs = selMdlSrs['nRELUs'][0],
nStepsTfw = selMdlSrs['nStepsTfw'][0],
lrnRateTfw = selMdlSrs['lrnRateTfw'][0],
visualize = True, newObs = True, verbose = True)
# thsMdlDf, thsYVldPby, thsYNewPby = fitMdlMLPRELUSGDTfw(
# glbXFit, glbYFit,
# nObsFit = 100, nObsBtc = 16,
# rotatePby = 0.2, rotateMaxAgl = 10,
# nRELUs = 128,
# nStepsTfw = 10, lrnRateTfw = 0.5,
# visualize = True, newObs = False, verbose = False)
print '\n selMdl:'
glbwriteSubmission(glbINew, selYNewPby,
'img_05_fit_ML_RELU_SGD_Tfw_SFDD_ImgSz_' + str(glbImg['size']) + \
'_sbmt_sel.csv')
finMdlDf, finYVldPby, finYNewPby = fitMdlLgtRgrSGDTfw(
glbXTrn, glbYTrn,
nObsFit = glbXTrn.shape[0],
nObsBtc = selMdlSrs['nObsBtc'][0],
rotatePby = selMdlSrs['rotatePby'][0],
rotateMaxAgl = selMdlSrs['rotateMaxAgl'][0],
# Ensure all Trn obs are used at least once
nStepsTfw = max(selMdlSrs['nStepsTfw'][0],
glbXTrn.shape[0] * 1.0 / selMdlSrs['nObsBtc'][0] + 1),
lrnRateTfw = selMdlSrs['lrnRateTfw'][0],
visualize = True, newObs = True, verbose = True)
# selMdlDf, selYVldPby, selYNewPby = fitMdlLgtRgrSGDTfw(
# glbXFit, glbYFit,
# nObsFit = glbXFit.shape[0],
# nObsBtc = selMdlSrs['nObsBtc'][0],
# rotatePby = selMdlSrs['rotatePby'][0],
# rotateMaxAgl = selMdlSrs['rotateMaxAgl'][0],
# nStepsTfw = selMdlSrs['nStepsTfw'][0],
# lrnRateTfw = selMdlSrs['lrnRateTfw'][0],
# visualize = True, newObs = True, verbose = True)
print finMdlDf
glbMdlDf = glbMdlDf.append(finMdlDf)
glbMdlDf = glbMdlDf.set_index(['id'] + srchParamsDct.keys(), drop = False)
glbMdlDf = glbMdlDf.sort_values(
['nObsFit', 'accVld', 'logLossVld', 'duration'],
ascending = [False , True , False, False])
print(glbMdlDf[list(set(glbMdlDf.columns) -
set(['id'] + srchParamsDct.keys()))])
myexportDf(glbMdlDf,
save_filepathname = glbPickleFile['models'],
save_drop_cols = 'model'
)
print '\n finMdl:'
glbwriteSubmission(glbINew, finYNewPby,
'img_04_fit_lgtRgr_SGD_Tfw_SFDD_ImgSz_' + str(glbImg['size']) + \
'_sbmt_fin.csv')
prtStr = 'LeaderBoard metric for this sel submission: %0.5f vs. ' + \
'logLossVld (sel): %0.5f'
print prtStr % (22.62562, 20.5467)
prtStr = 'LeaderBoard metric for this fin submission: %0.5f vs. ' + \
'logLossVld (fin): %0.5f'
print prtStr % (23.71528, 4.8369)
print 'Best score yet:%s: %0.5f' % \
('img_02_fit_lgtRgr(Skl)_SFDD_(ImgSz_32_)sbmt(_fin).csv', 2.63892)
Following code should be in img04_fit_lgtRgrSGDTf
Let's now switch to stochastic gradient descent training instead, which is much faster.
The graph will be similar, except that instead of holding all the training data into a constant node, we create a Placeholder node which will be fed actual data at every call of sesion.run().
import pandas as pd
models = pd.DataFrame({'nRELUs': [0]})
#models.ix[0, 'accuracy_scoreTest'] = 0
print models
batch_size = 128
graph = tf.Graph()
with graph.as_default():
# Input data. For the training data, we use a placeholder that will be fed
# at run time with a training minibatch.
tfwObsFitFtr = tf.placeholder(tf.float32,
shape=(batch_size, glbImg['size'] * glbImg['size']))
tfwObsFitRsp = tf.placeholder(tf.float32, shape=(batch_size, glbRspClassN))
tfwObsVldFtr = tf.constant(glbXVld)
tfwObsNewFtr = tf.constant(glbXNew)
# Variables.
tfwW = tf.Variable(
tf.truncated_normal([glbImg['size'] * glbImg['size'], glbRspClassN]))
tfwB = tf.Variable(tf.zeros([glbRspClassN]))
print(tfwW.initialized_value())
print(tfwB.initialized_value())
# Training computation.
logits = tf.matmul(tfwObsFitFtr, tfwW) + tfwB
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits, tfwObsFitRsp))
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions for the training, validation, and test data.
tfwObsTrnPred = tf.nn.softmax(logits)
tfwObsVldPred = tf.nn.softmax(
tf.matmul(tfwObsVldFtr, tfwW) + tfwB)
tfwObsNewPred = tf.nn.softmax(tf.matmul(tfwObsNewFtr, tfwW) + tfwB)
Let's run it:
nStepsTfw = 3001
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print("Initialized")
for step in range(nStepsTfw):
# Pick an offset within the training data, which has been randomized.
# Note: we could use better randomization across epochs.
offset = (step * batch_size) % (glbYFit.shape[0] - batch_size)
# Generate a minibatch.
batch_data = glbXFit[offset:(offset + batch_size), :]
batch_labels = glbYFit[offset:(offset + batch_size), :]
# Prepare a dictionary telling the session where to feed the minibatch.
# The key of the dictionary is the placeholder node of the graph to be fed,
# and the value is the numpy array to feed to it.
feed_dict = {tfwObsFitFtr : batch_data, tfwObsFitRsp : batch_labels}
_, l, predictions = session.run(
[optimizer, loss, tfwObsTrnPred], feed_dict=feed_dict)
if (step % 500 == 0):
print("Minibatch loss at step %d: %f" % (step, l))
print("Minibatch accuracy_score: %.1f%%" % accuracy_score(predictions, batch_labels))
print("Validation accuracy_score: %.1f%%" % accuracy_score(
tfwObsVldPred.eval(), glbYVld))
print("Test accuracy_score: %.1f%%" % accuracy_score(tfwObsNewPred.eval(), glbYNew))
models.ix[0, 'accuracy_scoreVld'] = accuracy_score(tfwObsVldPred.eval(), glbYVld)
models.ix[0, 'accuracy_scoreTst'] = accuracy_score( tfwObsNewPred.eval(), glbYNew)
models.ix[0, 'graph'] = graph
print(models)
Turn the logistic regression example with SGD into a 1-hidden layer neural network with rectified linear units (nn.relu()) and 1024 hidden nodes. This model should improve your validation / test accuracy_score.
nRELUs = [2 ** thsRelu for thsRelu in xrange(11)]
print(nRELUs)
for thsRelu in nRELUs:
models.ix[thsRelu, 'nRELUs'] = thsRelu
print models
thsRelu = nRELUs[9]
batch_size = 128
graph = tf.Graph()
with graph.as_default():
# Input data. For the training data, we use a placeholder that will be fed
# at run time with a training minibatch.
tfwObsFitFtr = tf.placeholder(tf.float32,
shape=(batch_size, glbImg['size'] * glbImg['size']))
tfwObsFitRsp = tf.placeholder(tf.float32, shape=(batch_size, glbRspClassN))
tfwObsVldFtr = tf.constant(glbXVld)
tfwObsNewFtr = tf.constant(glbXNew)
# Variables.
tfwW1 = tf.Variable(
tf.truncated_normal([glbImg['size'] * glbImg['size'], thsRelu]), name = 'tfwW1')
tfwB1 = tf.Variable(tf.zeros([thsRelu]), name = 'tfwB1')
tfwW2 = tf.Variable(
tf.truncated_normal([thsRelu, glbRspClassN]), name = 'tfwW2')
tfwB2 = tf.Variable(tf.zeros([glbRspClassN]), name = 'tfwB2')
print(tfwW1.initialized_value())
print(tfwB1.initialized_value())
#print(relus.initialized_value())
print(tfwW2.initialized_value())
print(tfwB2.initialized_value())
#tf.Print(relus, [relus])
# Training computation.
layer1 = tf.matmul(tfwObsFitFtr, tfwW1) + tfwB1
layer2 = tf.nn.relu(layer1)
layer3 = tf.matmul(layer2, tfwW2) + tfwB2
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(layer3, tfwObsFitRsp))
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions for the training, validation, and test data.
tfwObsTrnPred = tf.nn.softmax(layer3)
tfwObsVldPred = tf.nn.softmax(
tf.matmul(tf.nn.relu(tf.matmul(tfwObsVldFtr, tfwW1) + tfwB1), tfwW2) + tfwB2)
tfwObsNewPred = tf.nn.softmax(
tf.matmul(tf.nn.relu(tf.matmul(tfwObsNewFtr, tfwW1) + tfwB1), tfwW2) + tfwB2)
nStepsTfw = 3001
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print("Initialized")
for step in range(nStepsTfw):
# Pick an offset within the training data, which has been randomized.
# Note: we could use better randomization across epochs.
offset = (step * batch_size) % (glbYFit.shape[0] - batch_size)
# Generate a minibatch.
batch_data = glbXFit[offset:(offset + batch_size), :]
batch_labels = glbYFit[offset:(offset + batch_size), :]
# Prepare a dictionary telling the session where to feed the minibatch.
# The key of the dictionary is the placeholder node of the graph to be fed,
# and the value is the numpy array to feed to it.
feed_dict = {tfwObsFitFtr : batch_data, tfwObsFitRsp : batch_labels}
_, l, predictions = session.run(
[optimizer, loss, tfwObsTrnPred], feed_dict=feed_dict)
if (step % 500 == 0):
print("Minibatch loss at step %d: %f" % (step, l))
print("Minibatch accuracy_score: %.1f%%" % accuracy_score(predictions, batch_labels))
print("Validation accuracy_score: %.1f%%" % accuracy_score(
tfwObsVldPred.eval(), glbYVld))
print("Test accuracy_score: %.1f%%" % accuracy_score(tfwObsNewPred.eval(), glbYNew))
models.ix[thsRelu, 'accuracy_scoreVld'] = accuracy_score(tfwObsVldPred.eval(), glbYVld)
models.ix[thsRelu, 'accuracy_scoreTst'] = accuracy_score( tfwObsNewPred.eval(), glbYNew)
models.ix[thsRelu, 'graph'] = graph
print(models)
plt.figure()
#plt.plot(models['nRELUs'], models['accuracy_score.fit'], 'bo-', label = 'fit')
plt.plot(models['nRELUs'], models['accuracy_scoreVld'], 'rs-', label = 'vld')
plt.plot(models['nRELUs'], models['accuracy_scoreTst'], 'gp-', label = 'new')
plt.legend(loc = 'lower right')
plt.title("accuracy_score")
plt.xscale('symlog', basex=2)
axes = plt.gca()
axes.set_xlabel('nRELUs')
# axes.set_xlim([mdlDF['l1_penalty'][mdlDF['RSS.vld'].argmin()] / 10 ** 2, \
# mdlDF['l1_penalty'][mdlDF['RSS.vld'].argmin()] * 10 ** 2])
# axes.set_ylim([0, mdlDF['RSS.vld'].min() * 1.5])
plt.show()